#!/bin/bash

set -x

export PYTHONUNBUFFERED=1
export VLMEvalKit=/home/tione/notebook/xingy/longcot/VLMEvalKit
export HF_HOME=/home/tione/notebook/datasets/pretrained/cache/huggingface
export HF_HUB_CACHE=/home/tione/notebook/datasets/pretrained/cache/huggingface/hub
export HF_DATASETS_CACHE=/home/tione/notebook/datasets/pretrained/cache/datasets
export TORCH_HOME=/home/tione/notebook/datasets/pretrained/cache/torch
export LMUData=/home/tione/notebook/datasets/pretrained/cache/LMUData
export HUGGINGFACE_TOKEN=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
export RAY_DEBUG_POST_MORTEM=1

MODEL_PATH=../model/Qwen2.5-VL-3B-Instruct  # replace it with your local file path

python3 -m verl.trainer.main \
    config=examples/config.yaml \
    data.train_files=XenoZLH/MMRL30k@k12_train \
    data.val_files=XenoZLH/MMRL30k@k12_test \
    worker.actor.model.model_path=${MODEL_PATH} \
    worker.actor.kldprefill=true \
    worker.actor.kldprefill_layerwin=0,18 \
    worker.actor.freezeprefill=true \
    worker.actor.freezeprefill_layerwin=0,18 \
    worker.rollout.tensor_parallel_size=1 \
    worker.rollout.gpu_memory_utilization=0.5 \
    algorithm.pref_kl_coef=0.01 \
    trainer.experiment_name=debug_kldprefill_qwen2_5_vl_3b_geo3k_grpo_lw1e-2_ly_00_18 \
    trainer.n_gpus_per_node=8 \
    trainer.save_limit=1 \
    trainer.save_checkpoint_path="../pdboutputs/EasyR1/checkpoints/debug_kldprefill_qwen2_5_vl_3b_geo3k_grpo_lw1e-2_ly_00_18"
